In [1]:
#homework 4 -> Mateusz Kubita, plotly
In [2]:
import plotly
import pandas
import numpy
import re
import plotly.express as px
In [3]:
df = pandas.read_csv('philly_data.csv')
In [4]:
df2 = df[['Sale Date', 'Sale Price/bid price']]
In [5]:
df2.columns
Out[5]:
Index(['Sale Date', 'Sale Price/bid price'], dtype='object')
In [6]:
df2 = df2.rename(columns={'Sale Date': 'date', 'Sale Price/bid price': 'kwota'})
In [7]:
df2.head()
Out[7]:
date kwota
0 September 13 2016 $11,400
1 August 2 2016 $8,500
2 August 2 2016 $12,600
3 August 2 2016 $9,200
4 August 2 2016 $8,900
In [8]:
def split_it(cena):
    return re.findall("\d+\,\d+", cena)[0]
In [9]:
df2['kwota'] = df2['kwota'].astype(pandas.StringDtype())
df2['date'] = df2['date'].astype(pandas.StringDtype())
In [10]:
df2.dropna(inplace = True)
In [11]:
df2['kwota'] = df2['kwota'].apply(split_it)
In [12]:
df2.head()
Out[12]:
date kwota
0 September 13 2016 11,400
1 August 2 2016 8,500
2 August 2 2016 12,600
3 August 2 2016 9,200
4 August 2 2016 8,900
In [13]:
df2['kwota'] = df2['kwota'].str.replace(',', '').astype(float)
In [14]:
df2.date.unique()
Out[14]:
<StringArray>
['September 13  2016',     'August 2  2016',    'October 4  2016',
      'March 7  2017',   'February 7  2017']
Length: 5, dtype: string
In [15]:
#obliczamy srednia kwote dla kazdego miesiaca
df3 = df2.groupby(['date']).mean().reset_index()
In [16]:
df3
Out[16]:
date kwota
0 August 2 2016 53231.379310
1 February 7 2017 53500.000000
2 March 7 2017 57300.000000
3 October 4 2016 62235.849057
4 September 13 2016 59883.720930
In [17]:
d = {'February':2, 'August':8, 'March':3, 'October':10,"September":9}
In [18]:
df3.dtypes
Out[18]:
date      string
kwota    float64
dtype: object
In [19]:
df3['month'] = df3['date'].str.split(' ').str[0]
df3['year']= df3['date'].str.split(' ').str[3]
In [20]:
df3.month = df3.month.map(d)
In [21]:
df3.dtypes
Out[21]:
date      string
kwota    float64
month      int64
year      object
dtype: object
In [22]:
df3.sort_values(['year','month'], inplace = True)
In [23]:
df3['date'] = df3['year'].astype("string") + " " + df3['month'].astype("string")
In [24]:
df3 = df3.reset_index(drop=True)
In [25]:
df3
Out[25]:
date kwota month year
0 2016 8 53231.379310 8 2016
1 2016 9 59883.720930 9 2016
2 2016 10 62235.849057 10 2016
3 2017 2 53500.000000 2 2017
4 2017 3 57300.000000 3 2017
In [26]:
fig = px.line(df3, 
              x='date', y="kwota", 
              color_discrete_sequence = ['navy'], 
              title = "Średnia cena nieruchomości w Filadelfii",
             markers = True)
fig.show()
In [27]:
#Niestety dane te maja tylko 5 dat, co sprawia ze wykres ten wyglada dosc ubogo. 
#W sytuacji wiekszych danych wykres ten bylby bogatszy 
#i moglibysmy okreslic ciekawy trend cen nieruchomosci w Filadelfii na przestrzeni czasu
In [ ]: